# -*- coding: utf-8 -*-

"""
 (c) 2023 - Copyright CTyunOS Inc

 Authors:
   youyifeng <youyf2@chinatelecom.cn>

"""

import json
import os

import requests


class Scraper:
    def __init__(self, try_time=10):
        self.try_time = try_time
        pass

    def prune_sql_table(self):
        pass

    def scrapyCVE(self,total=0):
        """
        :API: https://gitee.com/openeuler/website-v2/blob/master/web-ui/docs/.vuepress/api/security.js
        :return: json

{
    "code": 0,
    "msg": "",
    "result": {
        "totalCount": 5329,
        "securityNoticeList": [],
        "cveDatabaseList": [
            {
                "id": 22370,
                "affectedProduct": "",
                "announcementTime": "",
                "attackComplexityNVD": "",
                "attackComplexityOE": "",
                "attackVectorNVD": "",
                "attackVectorOE": "",
                "availabilityNVD": "",
                "availabilityOE": "",
                "confidentialityNVD": "",
                "confidentialityOE": "",
                "cveId": "CVE-2023-26081",
                "cvsssCoreNVD": "7.5",
                "cvsssCoreOE": "7.5",
                "integrityNVD": "",
                "integrityOE": "",
                "nationalCyberAwarenessSystem": "",
                "packageName": "epiphany",
                "privilegesRequiredNVD": "",
                "privilegesRequiredOE": "",
                "scopeNVD": "",
                "scopeOE": "",
                "status": "Unaffected",
                "summary": "In Epiphany (aka GNOME Web) through 43.0, untrusted web content can trick users into exfiltrating passwords, because autofill occurs in sandboxed contexts.",
                "type": "",
                "userInteractionNVD": "",
                "userInteractionOE": "",
                "updateTime": "2023-03-03 11:01:01",
                "createTime": "2023-03-03 11:01:01",
                "securityNoticeNo": "",
                "parserBean": null,
                "cvrf": null,
                "packageList": null
            },
            {},
            ...,
            {}
        ]
    }
}

        """
        api_url = 'https://www.openeuler.openatom.cn/api-cve/cve-security-notice-server/cvedatabase/findAll'
        response_json_dict={}
        for page_number in range(int(total/100),300):
            print("更新第",page_number,"页CVE信息。")
            body = {"keyword": "", "status": "", "year": "", "score": "", "pages": {"page": page_number, "size": 100}}
            tmp_response_json_dict={}
            for try_index in range(self.try_time):
                try:
                    response = requests.post(url=api_url, json=body, timeout=(10, 30))
                    tmp_response_json_dict = json.loads(response.text)
                except Exception as e:
                    print("scrapy from api '%s' error!" % api_url, str(e))
                    if try_index == self.try_time - 1:
                        print("try [%d] times failed! exit.")
                        exit(1)
                    print(" try again [%d/%d] " % (try_index + 1, self.try_time))
                    continue
                if response_json_dict=={}:
                    response_json_dict=tmp_response_json_dict
                else:
                    response_json_dict["result"]["cveDatabaseList"]+=tmp_response_json_dict["result"]["cveDatabaseList"]
                break
            if tmp_response_json_dict=={} or ("result" in tmp_response_json_dict.keys() and tmp_response_json_dict["result"]["cveDatabaseList"]==[]):
                break
        return response_json_dict

    def scrapySA(self):
        """
        :API: https://gitee.com/openeuler/website-v2/blob/master/web-ui/docs/.vuepress/api/security.js
        :return: json
{
    "code": 0,
    "msg": "",
    "result": {
        "totalCount": 1425,
        "securityNoticeList": [
            {
                "id": 2773,
                "affectedComponent": "rubygem-activerecord",
                "affectedProduct": "openEuler-22.03-LTS-SP1",
                "announcementTime": "2023-03-01",
                "cveId": "CVE-2022-44566;CVE-2023-22794;",
                "description": "",
                "introduction": "",
                "packageName": "",
                "referenceDocuments": "",
                "revisionHistory": "",
                "securityNoticeNo": "openEuler-SA-2023-1133",
                "subject": "",
                "summary": "rubygem-activerecord security update",
                "type": "High",
                "updateTime": "2023-03-01 19:38:25",
                "cvrf": null,
                "packageHelperList": [],
                "packageList": [],
                "referenceList": [],
                "cveList": []
            },
            {},
            ...,
            {}
        ]
    }
}

{
    "pages":{
        "page":1,
        "size":10
    },
    "keyword":"",
    "type":[

    ],
    "date":[

    ],
    "affectedProduct":[

    ],
    "affectedComponent":"",
    "noticeType":"cve"
}
        """
        api_url = 'https://www.openeuler.openatom.cn/api-cve/cve-security-notice-server/securitynotice/findAll'
        # body = {"keyword": "", "date": [], "type": [], "pages": {"page": 1, "size": 5000}}
        body = {
            "pages": {
                "page": 1,
                "size": 100
            },
            "keyword": "",
            "type": [

            ],
            "date": [

            ],
            "affectedProduct": [

            ],
            "affectedComponent": "",
            "noticeType": "cve"
        }
        
        for try_index in range(self.try_time):
            try:
                response = requests.post(url=api_url, json=body, timeout=(10, 30))
                response_json_dict = json.loads(response.text)
            except Exception as e:
                print("scrapy from api '%s' error!" % api_url, str(e))
                if try_index == self.try_time - 1:
                    print("try [%d] times failed! exit.")
                    exit(1)
                print(" try again [%d/%d] " % (try_index + 1, self.try_time))
                continue
            break
        return response_json_dict

    def scrapy_CVRF_index(self):
        """
        :API: https://repo.openeuler.org/security/data/cvrf/index.txt
        :return: json
        """
        api_url = 'https://repo.openeuler.openatom.cn/security/data/cvrf/index.txt'
        for try_index in range(self.try_time):
            try:
                response = requests.get(url=api_url, timeout=(10, 30))
            except Exception as e:
                print("scrapy from api '%s' error!" % api_url, str(e))
                if try_index == self.try_time - 1:
                    print("try [%d] times failed! exit.")
                    exit(1)
                print(" try again [%d/%d] " % (try_index + 1, self.try_time))
                continue
            break
        if response.status_code < 200 or response.status_code > 299:
            print("ret code no in [200,300)")
            exit(1)
        index_list = response.text.split('\n')
        if 0 == len(index_list):
            raise " failed to get cvrf list"
        return index_list

    def process_per_cvrf(self, urls_with_index):

        import urllib
        from urllib.parse import unquote
        import shutil

        (index, total, local_prefix, url) = urls_with_index
        download_url = os.path.join("https://repo.openeuler.openatom.cn/security/data/cvrf/", url)
        # print(f" download {download_url}")
        download_url = unquote(download_url)
        file_name = os.path.basename(url)
        # print(f" file name: {file_name}")
        dir_path = os.path.join(local_prefix, os.path.dirname(url))
        file_path = os.path.join(local_prefix, url)
        # print(f" store path: {file_path}")

        os.makedirs(dir_path, exist_ok=True)

        try:
            file_size = 0
            request = urllib.request.Request(download_url)
            request.add_header("Range", "bytes={}-".format(file_size))
            with urllib.request.urlopen(request) as response, open(file_path, "wb") as file:
                shutil.copyfileobj(response, file)
        except Exception as e:
            print(f" [ {index}/{total} ] failed! {download_url} {str(e).strip()}")
            return

        print(f"[ {index}/{total} ] Download {download_url} save to: {file_path}")
